Load the libraries:
library(leaflet)
library(tmap)
library(sf)
library(tidyverse)
Get the data:
dc <-
st_read('data/raw/rasters/dc.shp')
dc_crimes <-
read_csv('data/raw/dc_crimes.csv')
census <-
st_read('data/raw/census/census.shp')
rasters <-
read_rds('data/raw/rasters/dc_lc.rds')
Dependencies for the questions:
violent_crimes <-
dc_crimes %>%
filter(lubridate::year(date_time) == 2020,
offense_group == 'violent')
crimes_sp <-
st_as_sf(
violent_crimes,
coords = c('longitude', 'latitude'),
crs = 4326)
Subset the census data to just data in which the state_name is “DC”. Assign the object to your global environment with the name dc_census.
dc_census <-
census %>%
filter(STATEFP == 11)
Plot the data such that the fill color is defined by the size of the population.
dc_census %>%
ggplot() +
geom_sf(aes(fill = population)) +
scale_fill_viridis_c(option = 'cividis') +
theme_void()
Filter the data to just violent crimes that occurred in 2020. Assign the filtered object to your global environment with the name violent_crimes.
violent_crimes <-
dc_crimes %>%
filter(lubridate::year(date_time) == 2020,
offense_group == 'violent')
Compare the size of the original object with that of the filtered object.
violent_crimes %>%
object.size() %>%
format('Mb')
## [1] "0.4 Mb"
dc_crimes %>%
object.size() %>%
format('Mb')
## [1] "6.1 Mb"
Dependency:
dc_census_reduced <-
dc_census %>%
select(GEOID, edu, income, population)
In a single purrr::map(), transform the projections of both objects as a list and assign the objects to your global environment as census_prj and crimes_prj.
list(crimes_sp,
dc_census_reduced) %>%
map(~st_transform(., crs = 5070)) %>%
set_names('crimes_prj', 'census_prj') %>%
list2env(.GlobalEnv)
## <environment: R_GlobalEnv>
Calculate the number of crimes per census tract and generate a map of the data, with census tracts colored by the number of violent crimes in 2020.
census_prj %>%
left_join(
st_join(
census_prj,
crimes_prj) %>%
as_tibble() %>%
group_by(GEOID) %>%
summarize(n = n()),
by = 'GEOID') %>%
ggplot() +
geom_sf(aes(fill = n)) +
scale_fill_viridis_c(option = 'cividis') +
theme_void()
Dependency for the next question:
census_simple <-
st_join(
census_prj,
crimes_prj) %>%
rmapshaper::ms_simplify(keep = 0.05)
Modify the DC Census data until the size of the resultant object is less than 4 Mb. Assign the object to your global environment as crimes_simpler. Then create a map where each census tract is colored by the number of violent crimes per 1000 people in 2020.
In a single map statement, transform crimes_prj and census_simpler to EPSG 4326. Save to your environment as crimes_4326 and census_4326, then remove crimes_prj and census_simpler from your global environment.
## <environment: R_GlobalEnv>
Filter crimes_4326 to just offenses classified as “robbery” and assign it to your global environment as “robberies”.
robberies <-
crimes_4326 %>%
filter(offense == 'robbery')
Dependency:
basemap <-
st_bbox(census_4326) %>%
tmaptools::read_osm()
You can add markers to a map using the tm_markers function. Without creating a new object, add markers representing homicides in Washington DC in 2020.
Filter the crimes data to where the offense is either robberies or homicides (but don’t assign an object to your global environment).
crimes_4326 %>%
filter(offense %in% c('robbery', 'homicide'))
## Simple feature collection with 2194 features and 4 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -77.10262 ymin: 38.81347 xmax: -76.91175 ymax: 38.98272
## Geodetic CRS: WGS 84
## # A tibble: 2,194 × 5
## id date_time offense offense_group geometry
## * <chr> <dttm> <chr> <chr> <POINT [°]>
## 1 20000205-01 2020-01-01 04:25:41 robbery violent (-76.96479 38.93453)
## 2 20000268-01 2020-01-01 07:21:17 robbery violent (-76.95198 38.89564)
## 3 20000409-01 2020-01-01 13:24:30 robbery violent (-76.98609 38.86694)
## 4 20000417-01 2020-01-01 14:05:17 robbery violent (-77.01757 38.89979)
## 5 20000483-01 2020-01-01 17:29:07 robbery violent (-77.02908 38.96992)
## 6 20000700-01 2020-01-01 23:34:57 robbery violent (-76.9943 38.92558)
## 7 20000725-01 2020-01-02 12:54:00 robbery violent (-76.92923 38.89458)
## 8 20001102-01 2020-01-02 18:06:38 robbery violent (-77.04168 38.91132)
## 9 20001173-01 2020-01-02 19:04:57 robbery violent (-77.02422 38.91807)
## 10 20001235-01 2020-01-02 21:14:07 robbery violent (-76.99308 38.92451)
## # … with 2,184 more rows
Some more raster tools for your toolkit …
We have learned how to reclassify a raster based on a reclass matrix. In many instances, we can reclassify much more simply with logic.
If we run a logical test on a raster, the values in a given raster cell will be replaced by 1 if the test evaluates to TRUE, or 0 if it evaluates to false.
For example, if we wanted to define forested raster cells as any cell in which the percent canopy cover was 80% or higher, we could simply use:
temp <-
rasters$canopy_cover >= 80
raster::plot(temp)
We can also use indexing and logic to modify the assignment of a single raster value (Note: This can also be done with the function raster::NAvalue(), but why learn another function?):
temp[temp == 0] <-
NA
raster::plot(temp)
I find this reassignment-with-indexing method overly clunky and old school, but it’s definitely quick and useful in a pinch!
Open water is classified as the value 11 in the NLCD raster. Using a logical statement, classify the raster such that all open water raster cells are given the value 0 and all non-water cells are given the value 1. Assign the raster to your global environment with the name land.
Modify your land raster such that all water raster cells are given the value NA and all non-water raster cells are given the value 1.
Plot the resultant land raster:
If we need to set raster values to NA a lot, this is a great case for creating our own custom function.
raster_to_na <-
function(raster_object, na_value = 0) {
raster_object[raster_object == na_value] <-
NA
raster_object
}
We can run this as such:
raster_to_na(rasters$nlcd == 11) %>%
raster::plot()
Or even in a piped statement:
{rasters$nlcd == 41} %>%
raster_to_na() %>%
raster::plot()
Any guesses as to why we had to use {}? What did it do here?
We often want to classify continuous rasters into discrete categories. Here, we create a three column reclass matrix of the form:
| from | to | becomes |
|---|---|---|
| low value, class 1 | high value, class 1 | classified value, class 1 |
| low value, class 2 | high value, class 2 | classified value, class 2 |
| low value, class 3 | high value, class 3 | classified value, class 3 |
Let’s create a forest layer from the canopy_cover raster. Here, we’ll define forest as any pixel with a canopy cover of 80% or higher. We can create our matrix as:
tibble(
from = c(0, 80),
to = c(80, 100),
becomes = c(0, 1)) %>%
as.matrix()
## from to becomes
## [1,] 0 80 0
## [2,] 80 100 1
The tribble() tidyverse function allows for row-wise creation of tibbles. The results are the same, but tribble() is helpful because it clearly communicates the table being created:
tribble(
~ from, ~ to, ~becomes,
0, 80, 0,
80, 100, 1) %>%
as.matrix()
## from to becomes
## [1,] 0 80 0
## [2,] 80 100 1
When reclassifying a continuous raster, we need to specify what to do at boundaries. For example, the value 80 is a part of both reclass values 0 and 1. This is accomplished using the raster::reclassify() arguments include.lowest = and right =.
include.lowest argument specifies whether the lowest value in the from column should be maintained if right = TRUE or the highest value in the to column if right = FALSE. The input for the argument is a logical value and the default for this value is FALSE.right argument specifies whether the intervals should be closed on the right. The input for the argument is a logical value and the default for this value is TRUE.This can be a bit of a mind puzzle. To see what these do, we can use the base R function cut (upon which raster::reclassify() depends). Let’s consider a simple tibble with the values 1 through 10:
tibble(values = 1:10)
## # A tibble: 10 × 1
## values
## <int>
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
Our goal will be to classify these values such that values from 1 through 7 are classified as “low” and values from 8 through 10 are classified as “high”. Instead of specifying a matrix here, we define the breaks that define the classes:
tibble(values = 1:10) %>%
mutate(
class =
cut(
values,
breaks = c(1, 8, 10),
labels = c('low', 'high')))
## # A tibble: 10 × 2
## values class
## <int> <fct>
## 1 1 <NA>
## 2 2 low
## 3 3 low
## 4 4 low
## 5 5 low
## 6 6 low
## 7 7 low
## 8 8 low
## 9 9 high
## 10 10 high
What is wrong with the resultant object?
Let’s change this by setting include.lowest to TRUE:
tibble(values = 1:10) %>%
mutate(
class =
cut(
values,
breaks = c(1, 8, 10),
labels = c('low', 'high'),
include.lowest = TRUE))
## # A tibble: 10 × 2
## values class
## <int> <fct>
## 1 1 low
## 2 2 low
## 3 3 low
## 4 4 low
## 5 5 low
## 6 6 low
## 7 7 low
## 8 8 low
## 9 9 high
## 10 10 high
What is wrong with the resultant object?
Let’s see what happens if we specify that values should be closed on the left instead (right = FALSE):
tibble(values = 1:10) %>%
mutate(
class =
cut(
values,
breaks = c(1, 8, 10),
labels = c('low', 'high'),
right = FALSE))
## # A tibble: 10 × 2
## values class
## <int> <fct>
## 1 1 low
## 2 2 low
## 3 3 low
## 4 4 low
## 5 5 low
## 6 6 low
## 7 7 low
## 8 8 high
## 9 9 high
## 10 10 <NA>
What is wrong with the resultant object?
Let’s see what happens when we specify include.lowest = TRUE and right = FALSE:
tibble(values = 1:10) %>%
mutate(
class =
cut(
values,
breaks = c(1, 8, 10),
labels = c('low', 'high'),
include.lowest = TRUE,
right = FALSE))
## # A tibble: 10 × 2
## values class
## <int> <fct>
## 1 1 low
## 2 2 low
## 3 3 low
## 4 4 low
## 5 5 low
## 6 6 low
## 7 7 low
## 8 8 high
## 9 9 high
## 10 10 high
Why did this work?
Let’s use this information to reclassify our canopy_cover raster:
tribble(
~ from, ~ to, ~becomes,
0, 80, 0,
80, 100, 1) %>%
as.matrix() %>%
raster::reclassify(
rasters$canopy_cover,
.,
include.lowest = TRUE,
right = FALSE) %>%
raster::plot()
I often classify urban landscapes as low, medium, and high-intensity urban using impervious surface as a proxy for urban intensity. I define low intensity as cells with less than 10% impervious surface, medium intensity as cells with 10 - 60% impervious surface, and high intensity as cells with greater than 60 percent impervious surface.
Create a reclass matrix to classify the impervious_surface raster into low, medium and high-intensity urban land cover classes.
Use cut() to ensure that your reclass matrix will classify your raster as expected.
Reclassify the impervious_surface raster based on your reclass matrix and assign the resultant object to your global environment with the name urban_intensity.
Raster math (i.e., applying mathematical operators to raster objects) can be used to modify the values of one raster or compare between the values of multiple rasters.
For example, if we wanted to convert our impervious surface layer from a percentage, as it is currently expressed, to a proportional value, we divide the current raster values by 100:
impervious_proportion <-
rasters$impervious_surface/100
raster::plot(impervious_proportion)
In a raster stack, all rasters have the same resolution, origin, extent, and CRS. This gives us the freedom to use math to compare rasters. For example, perhaps we want to convert any raster value that contains water an NA. To remove open water from the canopy_cover raster, we’ll multiply the raster with our land raster (where all non-water values are 1 and all water values are NA).
impervious_proportion_no_water <-
impervious_proportion*land
raster::plot(impervious_proportion_no_water)
Why would we want to use this?
Converting a raster file to a point file is often useful – especially because it allows us to plot rasters in ggplot. To do so, we use the raster::rasterToPoints() function.
Let’s convert impervious_proportion_no_water to a points object:
raster::rasterToPoints(impervious_proportion_no_water)
We could also write this as (my preference):
impervious_proportion_no_water %>%
raster::rasterToPoints()
To plot this with ggplot(), we convert the object to a tibble:
impervious_proportion_no_water %>%
raster::rasterToPoints() %>%
as_tibble()
## # A tibble: 177,648 × 3
## x y layer
## <dbl> <dbl> <dbl>
## 1 1616040 1936140 0.08
## 2 1616070 1936140 0.11
## 3 1616100 1936140 0.1
## 4 1616040 1936110 0.25
## 5 1616070 1936110 0.38
## 6 1616100 1936110 0.27
## 7 1616130 1936110 0.1
## 8 1616010 1936080 0.07
## 9 1616040 1936080 0.33
## 10 1616070 1936080 0.47
## # … with 177,638 more rows
Our raster layer is named “layer”, which isn’t very informative. Since it’s a tibble, it’s easy to rename the field:
impervious_proportion_no_water %>%
raster::rasterToPoints() %>%
as_tibble() %>%
rename(impervious_cover = layer)
## # A tibble: 177,648 × 3
## x y impervious_cover
## <dbl> <dbl> <dbl>
## 1 1616040 1936140 0.08
## 2 1616070 1936140 0.11
## 3 1616100 1936140 0.1
## 4 1616040 1936110 0.25
## 5 1616070 1936110 0.38
## 6 1616100 1936110 0.27
## 7 1616130 1936110 0.1
## 8 1616010 1936080 0.07
## 9 1616040 1936080 0.33
## 10 1616070 1936080 0.47
## # … with 177,638 more rows
We can then plot the data using the geom_tile() function:
impervious_proportion_no_water %>%
raster::rasterToPoints() %>%
as_tibble() %>%
rename(impervious_cover = layer) %>%
ggplot() +
geom_tile(aes(x = x, y = y, fill = impervious_cover))
Things are a bit askew. If we use the coord_equal() function, we can reshape the data:
impervious_proportion_no_water %>%
raster::rasterToPoints() %>%
as_tibble() %>%
rename(impervious_cover = layer) %>%
ggplot() +
geom_tile(aes(x = x, y = y, fill = impervious_cover)) +
coord_equal()
And, of course, I like theme_void():
impervious_proportion_no_water %>%
raster::rasterToPoints() %>%
as_tibble() %>%
rename(impervious_cover = layer) %>%
ggplot() +
geom_tile(aes(x = x, y = y, fill = impervious_cover)) +
coord_equal() +
theme_void()
And better colors:
impervious_proportion_no_water %>%
raster::rasterToPoints() %>%
as_tibble() %>%
rename(impervious_cover = layer) %>%
ggplot() +
geom_tile(aes(x = x, y = y, fill = impervious_cover)) +
scale_fill_viridis_c(option = 'magma') +
coord_equal() +
theme_void()
Because of how many violent crimes there were in Washington DC in 2020, plotting our point data is not very informative:
ggplot(dc) +
geom_sf() +
geom_sf(data = crimes_prj) +
theme_void()
We can make a more informative map by converting the points to raster cells and counting the number of points per cell. For this, we use the function raster::rasterize():
crimes_raster <-
crimes_prj %>%
dplyr::select(-everything()) %>%
st_transform(crs = st_crs(rasters)) %>%
as_Spatial() %>%
raster::rasterize(
rasters, # These are the cells that the points are rasterized to
fun = 'count',
background = 0)
And we can plot the data using base R:
raster::plot(crimes_raster)
Of course, that plot wasn’t very interesting, because the cells are so small. The raster::aggregate() function can be used to combine cells. The function includes the argument fact = where you provide a positive integer value that describes how the data are to be aggregated. Here, I specify the number 33, which will generate a raster cells that are roughly 1km x 1km (Why?):
crimes_raster %>%
raster::aggregate(
fact = 33,
fun = sum) %>%
raster::plot()
In a single piped statement:
dc shapefileggPlot()Just a brief bit of code …
tmap_mode('plot')
tm_shape(basemap) +
tm_rgb() +
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000') +
tm_shape(crimes_4326) +
tm_dots(size = 0.15) +
tm_layout(legend.outside = TRUE)
We can switch this to interactive plotting by just changing the mode to “view”!
tmap_mode('view')
tm_shape(basemap) +
tm_rgb() +
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000') +
tm_shape(crimes_4326) +
tm_dots(size = 0.15) +
tm_layout(legend.outside = TRUE)
Some of our elements were unnecessary or unusable …
tmap_mode('view')
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000') +
tm_shape(crimes_4326) +
tm_dots(size = 0.15)
Let’s make our points smaller now …
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000') +
tm_shape(crimes_4326) +
tm_dots(size = 0.05)
And add some transparency to our polygons …
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000',
alpha = 0.5) +
tm_shape(crimes_4326) +
tm_dots(size = 0.05)
We might want to color our dots by offense:
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000',
alpha = 0.5) +
tm_shape(crimes_4326) +
tm_dots(size = 0.05,
col = 'offense')
It’s hard to see anything going on with all of those dots …
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000',
alpha = 0.5) +
tm_shape(crimes_4326) +
tm_dots(size = 0.05,
col = 'offense',
clustering = TRUE)
We can also specify the type of crime as separate layers:
tm_shape(census_4326) +
tm_polygons(col = 'crimes_per_1000',
alpha = 0.5) +
tm_shape(
name = 'homicide',
filter(crimes_4326,
offense == 'homicide')) +
tm_dots(size = 0.05,
clustering = TRUE) +
tm_shape(
name = 'non-homicide',
filter(crimes_4326,
offense != 'homicide')) +
tm_dots(size = 0.05,
col = 'offense',
clustering = TRUE)
Generate an interactive map where each offense is given its own layer.